## Warning: package 'corrplot' was built under R version 3.5.3
## corrplot 0.84 loaded
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'Hmisc' was built under R version 3.5.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.3
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Warning: package 'leaps' was built under R version 3.5.3
## Warning: package 'lubridate' was built under R version 3.5.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Warning: package 'MASS' was built under R version 3.5.3
## Warning: package 'pROC' was built under R version 3.5.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
## Warning: package 'ROCR' was built under R version 3.5.3
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.3
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ----------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.2.5
## v tidyr   0.8.2     v dplyr   0.8.3
## v readr   1.3.1     v stringr 1.3.1
## v tibble  2.1.3     v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## -- Conflicts -------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x dplyr::select()          masks MASS::select()
## x lubridate::setdiff()     masks base::setdiff()
## x dplyr::src()             masks Hmisc::src()
## x dplyr::summarize()       masks Hmisc::summarize()
## x lubridate::union()       masks base::union()

1 Donnees ‘training’ et ‘test’

1.1 Import

< Les prédicteurs sont de type entier ou numérique. < Le dataframe ne contient pas de données manquantes. < En revanche, les variables “minute” et “hour” sont à 0 pour toutes les observations. < Dans l’échantillon, le nombre de jours des pluie et de beau temps sont à peu à prés équivalents.

## 'data.frame':    1244 obs. of  47 variables:
##  $ X                                           : int  1 3 5 7 9 11 13 15 17 19 ...
##  $ Year                                        : int  2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  $ Month                                       : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ Day                                         : int  1 3 5 7 9 11 13 15 17 19 ...
##  $ Hour                                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Minute                                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Temperature.daily.mean..2.m.above.gnd.      : num  13.4 14.3 19.9 19.3 22.1 ...
##  $ Relative.Humidity.daily.mean..2.m.above.gnd.: num  77.9 83.6 66.6 72 74.3 ...
##  $ Mean.Sea.Level.Pressure.daily.mean..MSL.    : num  1016 1016 1018 1014 1005 ...
##  $ Total.Precipitation.daily.sum..sfc.         : num  0.1 0 0 0.4 2 1.4 5.1 8.2 4.8 5.3 ...
##  $ Snowfall.amount.raw.daily.sum..sfc.         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Total.Cloud.Cover.daily.mean..sfc.          : num  65 81 0 49.7 58.8 ...
##  $ High.Cloud.Cover.daily.mean..high.cld.lay.  : num  33.1 31.6 0 24.1 48.4 ...
##  $ Medium.Cloud.Cover.daily.mean..mid.cld.lay. : num  36.9 10.8 0 24.3 54.7 ...
##  $ Low.Cloud.Cover.daily.mean..low.cld.lay.    : num  45.1 80.5 0 49.2 22.2 ...
##  $ Sunshine.Duration.daily.sum..sfc.           : num  350.8 68.7 891.7 666.7 400.7 ...
##  $ Shortwave.Radiation.daily.sum..sfc.         : num  5722 3551 8284 7456 6532 ...
##  $ Wind.Speed.daily.mean..10.m.above.gnd.      : num  8.97 8.61 4.47 10.05 10.73 ...
##  $ Wind.Direction.daily.mean..10.m.above.gnd.  : num  281 221 172 259 177 ...
##  $ Wind.Speed.daily.mean..80.m.above.gnd.      : num  11.61 11.64 6.17 12.64 14.2 ...
##  $ Wind.Direction.daily.mean..80.m.above.gnd.  : num  280 228 194 281 173 ...
##  $ Wind.Speed.daily.mean..900.mb.              : num  14.6 22.9 10.6 19.9 16.6 ...
##  $ Wind.Direction.daily.mean..900.mb.          : num  318.3 75.3 167.1 276.3 207.8 ...
##  $ Wind.Gust.daily.mean..sfc.                  : num  12.21 12.76 5.57 16.94 11.99 ...
##  $ Temperature.daily.max..2.m.above.gnd.       : num  17.6 18.2 25.6 22.6 27.9 ...
##  $ Temperature.daily.min..2.m.above.gnd.       : num  8.1 10.3 12.3 15.3 15.5 ...
##  $ Relative.Humidity.daily.max..2.m.above.gnd. : int  96 96 88 91 89 97 96 96 97 94 ...
##  $ Relative.Humidity.daily.min..2.m.above.gnd. : int  61 71 47 54 49 61 78 75 78 69 ...
##  $ Mean.Sea.Level.Pressure.daily.max..MSL.     : num  1018 1018 1021 1016 1007 ...
##  $ Mean.Sea.Level.Pressure.daily.min..MSL.     : num  1013 1015 1016 1010 1003 ...
##  $ Total.Cloud.Cover.daily.max..sfc.           : num  100 100 0 100 100 100 100 100 100 100 ...
##  $ Total.Cloud.Cover.daily.min..sfc.           : num  1 3.3 0 0.3 3 0 0 0 100 1 ...
##  $ High.Cloud.Cover.daily.max..high.cld.lay.   : int  100 100 0 100 100 5 100 100 100 100 ...
##  $ High.Cloud.Cover.daily.min..high.cld.lay.   : int  0 0 0 0 7 0 0 0 0 0 ...
##  $ Medium.Cloud.Cover.daily.max..mid.cld.lay.  : int  100 100 0 100 100 100 100 100 100 100 ...
##  $ Medium.Cloud.Cover.daily.min..mid.cld.lay.  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Low.Cloud.Cover.daily.max..low.cld.lay.     : int  100 100 0 100 100 100 100 100 100 100 ...
##  $ Low.Cloud.Cover.daily.min..low.cld.lay.     : int  0 0 0 0 0 0 0 0 100 0 ...
##  $ Wind.Speed.daily.max..10.m.above.gnd.       : num  14.06 15.26 8.71 16.62 21.65 ...
##  $ Wind.Speed.daily.min..10.m.above.gnd.       : num  2.41 2.74 0.51 0.51 3.22 0.72 4.68 1.8 3.71 5.59 ...
##  $ Wind.Speed.daily.max..80.m.above.gnd.       : num  21.6 20.5 18.7 20.9 27.6 ...
##  $ Wind.Speed.daily.min..80.m.above.gnd.       : num  1.44 3.71 0.72 0.72 2.16 ...
##  $ Wind.Speed.daily.max..900.mb.               : num  28.8 40.8 22 41.4 25.9 ...
##  $ Wind.Speed.daily.min..900.mb.               : num  4.39 2.1 0.51 3.6 4.02 ...
##  $ Wind.Gust.daily.max..sfc.                   : num  23.4 24.1 15.8 32.8 27.7 ...
##  $ Wind.Gust.daily.min..sfc.                   : num  3.24 3.24 0.72 0.72 3.24 1.08 5.76 2.88 4.32 8.28 ...
##  $ pluie.demain                                : logi  TRUE FALSE TRUE TRUE FALSE TRUE ...
##        X               Year          Month             Day       
##  Min.   :   1.0   Min.   :2010   Min.   : 1.000   Min.   : 1.00  
##  1st Qu.: 700.5   1st Qu.:2012   1st Qu.: 4.000   1st Qu.: 8.00  
##  Median :1438.0   Median :2014   Median : 6.000   Median :16.00  
##  Mean   :1446.9   Mean   :2014   Mean   : 6.493   Mean   :15.73  
##  3rd Qu.:2183.5   3rd Qu.:2016   3rd Qu.: 9.000   3rd Qu.:23.00  
##  Max.   :2941.0   Max.   :2018   Max.   :12.000   Max.   :31.00  
##       Hour       Minute  Temperature.daily.mean..2.m.above.gnd.
##  Min.   :0   Min.   :0   Min.   :-7.100                        
##  1st Qu.:0   1st Qu.:0   1st Qu.: 6.695                        
##  Median :0   Median :0   Median :12.575                        
##  Mean   :0   Mean   :0   Mean   :12.227                        
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:17.640                        
##  Max.   :0   Max.   :0   Max.   :29.590                        
##  Relative.Humidity.daily.mean..2.m.above.gnd.
##  Min.   :41.12                               
##  1st Qu.:65.11                               
##  Median :71.88                               
##  Mean   :71.45                               
##  3rd Qu.:78.42                               
##  Max.   :95.25                               
##  Mean.Sea.Level.Pressure.daily.mean..MSL.
##  Min.   : 978.5                          
##  1st Qu.:1012.6                          
##  Median :1017.0                          
##  Mean   :1017.0                          
##  3rd Qu.:1021.6                          
##  Max.   :1038.6                          
##  Total.Precipitation.daily.sum..sfc. Snowfall.amount.raw.daily.sum..sfc.
##  Min.   : 0.000                      Min.   : 0.00000                   
##  1st Qu.: 0.000                      1st Qu.: 0.00000                   
##  Median : 0.100                      Median : 0.00000                   
##  Mean   : 2.213                      Mean   : 0.06156                   
##  3rd Qu.: 2.500                      3rd Qu.: 0.00000                   
##  Max.   :40.300                      Max.   :13.86000                   
##  Total.Cloud.Cover.daily.mean..sfc.
##  Min.   :  0.00                    
##  1st Qu.: 22.20                    
##  Median : 52.88                    
##  Mean   : 50.66                    
##  3rd Qu.: 78.34                    
##  Max.   :100.00                    
##  High.Cloud.Cover.daily.mean..high.cld.lay.
##  Min.   : 0.00                             
##  1st Qu.: 2.20                             
##  Median :13.23                             
##  Mean   :19.75                             
##  3rd Qu.:32.59                             
##  Max.   :94.54                             
##  Medium.Cloud.Cover.daily.mean..mid.cld.lay.
##  Min.   :  0.000                            
##  1st Qu.:  2.678                            
##  Median : 23.770                            
##  Mean   : 31.284                            
##  3rd Qu.: 54.710                            
##  Max.   :100.000                            
##  Low.Cloud.Cover.daily.mean..low.cld.lay.
##  Min.   :  0.00                          
##  1st Qu.:  8.49                          
##  Median : 38.70                          
##  Mean   : 39.68                          
##  3rd Qu.: 63.58                          
##  Max.   :100.00                          
##  Sunshine.Duration.daily.sum..sfc. Shortwave.Radiation.daily.sum..sfc.
##  Min.   :   0.0                    Min.   : 283.9                     
##  1st Qu.: 118.4                    1st Qu.:2059.5                     
##  Median : 371.5                    Median :3576.0                     
##  Mean   : 377.4                    Mean   :3945.7                     
##  3rd Qu.: 591.9                    3rd Qu.:5730.9                     
##  Max.   :1022.2                    Max.   :8337.5                     
##  Wind.Speed.daily.mean..10.m.above.gnd.
##  Min.   : 2.020                        
##  1st Qu.: 6.567                        
##  Median : 9.280                        
##  Mean   :10.869                        
##  3rd Qu.:13.400                        
##  Max.   :43.750                        
##  Wind.Direction.daily.mean..10.m.above.gnd.
##  Min.   : 34.51                            
##  1st Qu.:155.87                            
##  Median :208.99                            
##  Mean   :202.09                            
##  3rd Qu.:251.75                            
##  Max.   :337.65                            
##  Wind.Speed.daily.mean..80.m.above.gnd.
##  Min.   : 2.110                        
##  1st Qu.: 8.738                        
##  Median :12.375                        
##  Mean   :14.393                        
##  3rd Qu.:17.887                        
##  Max.   :54.620                        
##  Wind.Direction.daily.mean..80.m.above.gnd. Wind.Speed.daily.mean..900.mb.
##  Min.   : 36.64                             Min.   : 2.38                 
##  1st Qu.:158.84                             1st Qu.:13.39                 
##  Median :215.37                             Median :20.50                 
##  Mean   :207.04                             Mean   :25.05                 
##  3rd Qu.:257.23                             3rd Qu.:31.87                 
##  Max.   :342.42                             Max.   :97.69                 
##  Wind.Direction.daily.mean..900.mb. Wind.Gust.daily.mean..sfc.
##  Min.   : 29.22                     Min.   : 2.710            
##  1st Qu.:149.22                     1st Qu.: 9.643            
##  Median :237.40                     Median :13.835            
##  Mean   :208.69                     Mean   :16.791            
##  3rd Qu.:265.66                     3rd Qu.:21.210            
##  Max.   :343.97                     Max.   :70.020            
##  Temperature.daily.max..2.m.above.gnd.
##  Min.   :-4.39                        
##  1st Qu.:10.65                        
##  Median :16.70                        
##  Mean   :16.48                        
##  3rd Qu.:22.45                        
##  Max.   :36.22                        
##  Temperature.daily.min..2.m.above.gnd.
##  Min.   :-11.650                      
##  1st Qu.:  2.998                      
##  Median :  8.255                      
##  Mean   :  8.051                      
##  3rd Qu.: 13.072                      
##  Max.   : 23.360                      
##  Relative.Humidity.daily.max..2.m.above.gnd.
##  Min.   : 55.0                              
##  1st Qu.: 83.0                              
##  Median : 89.0                              
##  Mean   : 87.9                              
##  3rd Qu.: 94.0                              
##  Max.   :100.0                              
##  Relative.Humidity.daily.min..2.m.above.gnd.
##  Min.   :21.0                               
##  1st Qu.:45.0                               
##  Median :54.0                               
##  Mean   :54.2                               
##  3rd Qu.:63.0                               
##  Max.   :93.0                               
##  Mean.Sea.Level.Pressure.daily.max..MSL.
##  Min.   : 981.4                         
##  1st Qu.:1015.7                         
##  Median :1019.5                         
##  Mean   :1019.8                         
##  3rd Qu.:1024.4                         
##  Max.   :1041.8                         
##  Mean.Sea.Level.Pressure.daily.min..MSL. Total.Cloud.Cover.daily.max..sfc.
##  Min.   : 976.1                          Min.   :  0.00                   
##  1st Qu.:1009.4                          1st Qu.:100.00                   
##  Median :1014.4                          Median :100.00                   
##  Mean   :1014.2                          Mean   : 89.08                   
##  3rd Qu.:1019.3                          3rd Qu.:100.00                   
##  Max.   :1037.4                          Max.   :100.00                   
##  Total.Cloud.Cover.daily.min..sfc.
##  Min.   :  0.0                    
##  1st Qu.:  0.0                    
##  Median :  0.0                    
##  Mean   :  8.5                    
##  3rd Qu.:  2.1                    
##  Max.   :100.0                    
##  High.Cloud.Cover.daily.max..high.cld.lay.
##  Min.   :  0.00                           
##  1st Qu.: 18.00                           
##  Median :100.00                           
##  Mean   : 61.99                           
##  3rd Qu.:100.00                           
##  Max.   :100.00                           
##  High.Cloud.Cover.daily.min..high.cld.lay.
##  Min.   : 0.0000                          
##  1st Qu.: 0.0000                          
##  Median : 0.0000                          
##  Mean   : 0.7146                          
##  3rd Qu.: 0.0000                          
##  Max.   :45.0000                          
##  Medium.Cloud.Cover.daily.max..mid.cld.lay.
##  Min.   :  0.00                            
##  1st Qu.: 34.75                            
##  Median :100.00                            
##  Mean   : 72.83                            
##  3rd Qu.:100.00                            
##  Max.   :100.00                            
##  Medium.Cloud.Cover.daily.min..mid.cld.lay.
##  Min.   :  0.000                           
##  1st Qu.:  0.000                           
##  Median :  0.000                           
##  Mean   :  1.953                           
##  3rd Qu.:  0.000                           
##  Max.   :100.000                           
##  Low.Cloud.Cover.daily.max..low.cld.lay.
##  Min.   :  0.00                         
##  1st Qu.:100.00                         
##  Median :100.00                         
##  Mean   : 80.85                         
##  3rd Qu.:100.00                         
##  Max.   :100.00                         
##  Low.Cloud.Cover.daily.min..low.cld.lay.
##  Min.   :  0.000                        
##  1st Qu.:  0.000                        
##  Median :  0.000                        
##  Mean   :  4.551                        
##  3rd Qu.:  0.000                        
##  Max.   :100.000                        
##  Wind.Speed.daily.max..10.m.above.gnd.
##  Min.   : 4.61                        
##  1st Qu.:12.31                        
##  Median :17.33                        
##  Mean   :19.22                        
##  3rd Qu.:23.88                        
##  Max.   :69.42                        
##  Wind.Speed.daily.min..10.m.above.gnd.
##  Min.   : 0.000                       
##  1st Qu.: 1.080                       
##  Median : 2.310                       
##  Mean   : 3.681                       
##  3rd Qu.: 4.800                       
##  Max.   :36.040                       
##  Wind.Speed.daily.max..80.m.above.gnd.
##  Min.   : 4.84                        
##  1st Qu.:18.19                        
##  Median :23.82                        
##  Mean   :25.45                        
##  3rd Qu.:29.92                        
##  Max.   :79.78                        
##  Wind.Speed.daily.min..80.m.above.gnd. Wind.Speed.daily.max..900.mb.
##  Min.   : 0.000                        Min.   :  6.13               
##  1st Qu.: 1.125                        1st Qu.: 25.18               
##  Median : 2.550                        Median : 37.94               
##  Mean   : 4.901                        Mean   : 42.26               
##  3rd Qu.: 6.130                        3rd Qu.: 55.51               
##  Max.   :46.130                        Max.   :124.20               
##  Wind.Speed.daily.min..900.mb. Wind.Gust.daily.max..sfc.
##  Min.   : 0.00                 Min.   : 5.04            
##  1st Qu.: 3.26                 1st Qu.:19.08            
##  Median : 7.15                 Median :25.92            
##  Mean   :11.37                 Mean   :29.49            
##  3rd Qu.:14.69                 3rd Qu.:36.36            
##  Max.   :82.07                 Max.   :97.92            
##  Wind.Gust.daily.min..sfc. pluie.demain   
##  Min.   : 0.000            Mode :logical  
##  1st Qu.: 2.160            FALSE:625      
##  Median : 3.960            TRUE :619      
##  Mean   : 6.655                           
##  3rd Qu.: 8.640                           
##  Max.   :51.120
## [1] "Part des jours de pluie"
## 
##     FALSE      TRUE 
## 0.5024116 0.4975884

1.2 Preparation

% unite(col = “Date”, Year, Month, Day, sep = “-”) %>% dplyr::select(-X, -Hour, -Minute) train <- train %>% mutate(Date = ymd(Date)) #%%%%% Data Test %%%%%# test\(Day <- ifelse(nchar(as.character(test\)Day)) == 1, paste(“0”, as.character(test\(Day), sep = ""), as.character(test\)Day)) test\(Month <- ifelse( nchar(as.character(test\)Month)) == 1, paste(“0”, as.character(test\(Month), sep = ""), as.character(test\)Month) ) test\(Mois <- as.factor(test\)Month)# la variable mois est conservée, de type factor, pour la modélisation. test <- test %>% unite(col = “Date”, Year, Month, Day, sep = “-”) %>% dplyr::select(-X, -Hour, -Minute) test <- test %>% mutate(Date = ymd(Date)) ```

2 Exploration donnees

2.1 Nuage de points

< En première analyse, la pression atmosphérique et la vitesse du vent pourraient nous aider à prédire la survenue de la pluie.

for (i in 2:(ncol(train)-2)){
  p <- ggplot(data = train, mapping = aes(x = Date , y = train[,i], col=pluie.demain)) +  geom_point() +labs(x = "Date", y = colnames(train[i]))
  print(p)}

2.2 Distribution en fonction de la variable cible

< Au regard de leur distribution, la couverture nuageuse pourrait aussi expliquer la survenue de la pluie. La direction du vent, les rafales de vent ou l’humidité relative pourraient aussi jouer un rôle.

for (i in 2:(ncol(train)-3)){
  p <- histogram(~train[,i] | pluie.demain, data=train, type = "percent", col="grey", xlab=colnames(train[i]), breaks=10 )
  q <-ggplot(train, aes(x = train[,i], fill = pluie.demain)) + 
  labs(title = paste("Distribution de la variable",colnames(train[i]), sep=" "),
       x = colnames(train[i]), y = "Fréquence",
       fill = "Couleur",
       subtitle = "Histogramme de distribution") +
  geom_density(alpha = 0.4) + # Transparency
  guides(fill = guide_legend(override.aes = list(alpha = 1)))
   print(p)
   print(q)
}

rm(p,q,i)

2.3 Correlation linéaire des variables deux à deux

< Les coefficients de corrélation sont élevés et positifs entre variables mesurant force du vent et rafales. < Les corrélations linéaires sont négatives entre, d’une part, les minutes d’ensoleillement ou le rayonnement solaire et d’autre part, la nébulosité. < Il faut s’attendre à des problèmes de colinéarité.

mtrain = data.matrix(train[, 2:(ncol(train) - 2)])
corrplot(
cor(mtrain),
type = "lower",
order = "hclust",
tl.col = "black",
tl.srt = 3,
tl.cex = 0.55
)

rm(mtrain)

2.4 Echantillonnage pour la validation croisée

|z|)
## (Intercept) 4.42e-13 ## Mean.Sea.Level.Pressure.daily.min..MSL. 0.054997 .
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000293
## Wind.Direction.daily.mean..900.mb. 3.81e-05 ## Mois02 0.000729 ## Mois03 0.001700 ** ## Mois04 8.62e-05 ## Mois05 0.066416 .
## Mois06 0.363604
## Mois07 0.111875
## Mois08 0.012401

## Mois09 0.004433
## Mois10 0.002350 ** ## Mois11 0.002824 ** ## Mois12 0.206446
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.056760 .
## Mean.Sea.Level.Pressure.daily.max..MSL. 0.018443 *
## High.Cloud.Cover.daily.min..high.cld.lay. 0.043751 *
## Temperature.daily.max..2.m.above.gnd. 0.000492 * ## Temperature.daily.min..2.m.above.gnd. 0.002182 ## Total.Cloud.Cover.daily.min..sfc. 0.003122 ** ## Low.Cloud.Cover.daily.min..low.cld.lay. 0.018938 *
## Mean.Sea.Level.Pressure.daily.mean..MSL. 0.120894
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1152.01 on 831 degrees of freedom ## Residual deviance: 897.39 on 809 degrees of freedom ## AIC: 943.39 ## ## Number of Fisher Scoring iterations: 4 ```

# Analyse de la déviance
pchisq(1226.68 - 942.68, 884 - 865, lower = F)
## [1] 3.755128e-49
print("Test du rapport de vraisemblance du modèle nul contre le modèle 1")
## [1] "Test du rapport de vraisemblance du modèle nul contre le modèle 1"
# p-valeur très faible : on rejette le modèle sans covariable. Le modèle est utile.
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 1")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 1"
pchisq(942.53, 865, lower = F) # La p-valeur est faible.Le modèle doit être amélioré.
## [1] 0.03385231
###############################################################################################################
#                                                                                                             #
# Modele 2 : exhaustivité des variables numeriques et modélisation pas à pas descendante avec le critère AIC  #
#                                                                                                             #
###############################################################################################################

model2 <- stepAIC(glm(pluie.demain ~ . -Date, data=train[d,], family = binomial(link="logit")),direction="backward", trace = F)
summary(model2)
## 
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. + 
##     High.Cloud.Cover.daily.mean..high.cld.lay. + Wind.Direction.daily.mean..10.m.above.gnd. + 
##     Wind.Speed.daily.mean..80.m.above.gnd. + Wind.Direction.daily.mean..80.m.above.gnd. + 
##     Wind.Speed.daily.mean..900.mb. + Wind.Direction.daily.mean..900.mb. + 
##     Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.max..MSL. + 
##     Mean.Sea.Level.Pressure.daily.min..MSL. + Total.Cloud.Cover.daily.min..sfc. + 
##     High.Cloud.Cover.daily.min..high.cld.lay. + Medium.Cloud.Cover.daily.max..mid.cld.lay. + 
##     Low.Cloud.Cover.daily.min..low.cld.lay. + Wind.Speed.daily.min..10.m.above.gnd. + 
##     Wind.Speed.daily.min..900.mb. + Mois, family = binomial(link = "logit"), 
##     data = train[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3466  -0.8668  -0.3001   0.8803   2.6061  
## 
## Coefficients:
##                                              Estimate Std. Error z value
## (Intercept)                                107.267765  15.031133   7.136
## Temperature.daily.mean..2.m.above.gnd.       0.239479   0.066819   3.584
## High.Cloud.Cover.daily.mean..high.cld.lay.   0.010413   0.005471   1.903
## Wind.Direction.daily.mean..10.m.above.gnd.   0.012722   0.006345   2.005
## Wind.Speed.daily.mean..80.m.above.gnd.      -0.071261   0.025919  -2.749
## Wind.Direction.daily.mean..80.m.above.gnd.  -0.012275   0.006622  -1.854
## Wind.Speed.daily.mean..900.mb.               0.034900   0.015659   2.229
## Wind.Direction.daily.mean..900.mb.           0.006376   0.001650   3.864
## Temperature.daily.min..2.m.above.gnd.       -0.241822   0.068492  -3.531
## Mean.Sea.Level.Pressure.daily.max..MSL.     -0.063072   0.030341  -2.079
## Mean.Sea.Level.Pressure.daily.min..MSL.     -0.044340   0.028104  -1.578
## Total.Cloud.Cover.daily.min..sfc.            0.026575   0.009035   2.941
## High.Cloud.Cover.daily.min..high.cld.lay.   -0.052954   0.028783  -1.840
## Medium.Cloud.Cover.daily.max..mid.cld.lay.   0.009602   0.002671   3.595
## Low.Cloud.Cover.daily.min..low.cld.lay.     -0.022741   0.009623  -2.363
## Wind.Speed.daily.min..10.m.above.gnd.        0.086899   0.037099   2.342
## Wind.Speed.daily.min..900.mb.               -0.032693   0.017435  -1.875
## Mois02                                      -1.526803   0.449486  -3.397
## Mois03                                      -1.506890   0.471192  -3.198
## Mois04                                      -1.949896   0.505119  -3.860
## Mois05                                      -1.099664   0.527470  -2.085
## Mois06                                      -0.680814   0.566494  -1.202
## Mois07                                      -1.043724   0.601652  -1.735
## Mois08                                      -1.595593   0.593558  -2.688
## Mois09                                      -1.638347   0.550473  -2.976
## Mois10                                      -1.416564   0.459614  -3.082
## Mois11                                      -1.374426   0.447773  -3.069
## Mois12                                      -0.465689   0.444194  -1.048
##                                            Pr(>|z|)    
## (Intercept)                                9.58e-13 ***
## Temperature.daily.mean..2.m.above.gnd.     0.000338 ***
## High.Cloud.Cover.daily.mean..high.cld.lay. 0.057000 .  
## Wind.Direction.daily.mean..10.m.above.gnd. 0.044961 *  
## Wind.Speed.daily.mean..80.m.above.gnd.     0.005970 ** 
## Wind.Direction.daily.mean..80.m.above.gnd. 0.063810 .  
## Wind.Speed.daily.mean..900.mb.             0.025829 *  
## Wind.Direction.daily.mean..900.mb.         0.000112 ***
## Temperature.daily.min..2.m.above.gnd.      0.000415 ***
## Mean.Sea.Level.Pressure.daily.max..MSL.    0.037639 *  
## Mean.Sea.Level.Pressure.daily.min..MSL.    0.114626    
## Total.Cloud.Cover.daily.min..sfc.          0.003268 ** 
## High.Cloud.Cover.daily.min..high.cld.lay.  0.065801 .  
## Medium.Cloud.Cover.daily.max..mid.cld.lay. 0.000325 ***
## Low.Cloud.Cover.daily.min..low.cld.lay.    0.018115 *  
## Wind.Speed.daily.min..10.m.above.gnd.      0.019162 *  
## Wind.Speed.daily.min..900.mb.              0.060780 .  
## Mois02                                     0.000682 ***
## Mois03                                     0.001384 ** 
## Mois04                                     0.000113 ***
## Mois05                                     0.037088 *  
## Mois06                                     0.229440    
## Mois07                                     0.082782 .  
## Mois08                                     0.007184 ** 
## Mois09                                     0.002918 ** 
## Mois10                                     0.002056 ** 
## Mois11                                     0.002144 ** 
## Mois12                                     0.294459    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  886.83  on 804  degrees of freedom
## AIC: 942.83
## 
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 2")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 2"
pchisq(919.25, 859, lower = F)# La p-valeur reste faible (7,5%).Le modèle peut sans doute être amélioré.
## [1] 0.0753681
###############################################################################################################
#                                                                                                             #
#         Modele 3 : Ajout de dummies et modélisation pas à pas descendante avec le critère AIC               #
#                 variables indicatrices choisies à partir de l'analyse des distributions                     #
###############################################################################################################

model3 <- stepAIC(glm(pluie.demain ~ . -Date +I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015) +I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.> 1 ) + I(Total.Cloud.Cover.daily.mean..sfc.>50) + I(High.Cloud.Cover.daily.mean..high.cld.lay.>15) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Direction.daily.mean..10.m.above.gnd.>150)+I(Wind.Speed.daily.mean..900.mb.<15), data=train[d,], family = binomial(link="logit")),direction="backward", trace = F)
summary(model3)
## 
## Call:
## glm(formula = pluie.demain ~ Total.Cloud.Cover.daily.mean..sfc. + 
##     High.Cloud.Cover.daily.mean..high.cld.lay. + Low.Cloud.Cover.daily.mean..low.cld.lay. + 
##     Wind.Direction.daily.mean..10.m.above.gnd. + Wind.Speed.daily.mean..80.m.above.gnd. + 
##     Wind.Direction.daily.mean..80.m.above.gnd. + Wind.Speed.daily.mean..900.mb. + 
##     Wind.Direction.daily.mean..900.mb. + Temperature.daily.max..2.m.above.gnd. + 
##     Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.max..MSL. + 
##     Total.Cloud.Cover.daily.min..sfc. + High.Cloud.Cover.daily.min..high.cld.lay. + 
##     Medium.Cloud.Cover.daily.max..mid.cld.lay. + Low.Cloud.Cover.daily.min..low.cld.lay. + 
##     Wind.Speed.daily.min..10.m.above.gnd. + Wind.Speed.daily.min..900.mb. + 
##     Mois + I(Sunshine.Duration.daily.sum..sfc. < 250), family = binomial(link = "logit"), 
##     data = train[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6053  -0.8678  -0.2845   0.8925   2.4198  
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                    107.278492  15.243286
## Total.Cloud.Cover.daily.mean..sfc.              -0.021589   0.010224
## High.Cloud.Cover.daily.mean..high.cld.lay.       0.013312   0.006478
## Low.Cloud.Cover.daily.mean..low.cld.lay.         0.017451   0.008261
## Wind.Direction.daily.mean..10.m.above.gnd.       0.013548   0.006449
## Wind.Speed.daily.mean..80.m.above.gnd.          -0.073401   0.025705
## Wind.Direction.daily.mean..80.m.above.gnd.      -0.012960   0.006702
## Wind.Speed.daily.mean..900.mb.                   0.037726   0.015500
## Wind.Direction.daily.mean..900.mb.               0.006859   0.001661
## Temperature.daily.max..2.m.above.gnd.            0.160613   0.044898
## Temperature.daily.min..2.m.above.gnd.           -0.170790   0.048569
## Mean.Sea.Level.Pressure.daily.max..MSL.         -0.107728   0.014841
## Total.Cloud.Cover.daily.min..sfc.                0.024680   0.009438
## High.Cloud.Cover.daily.min..high.cld.lay.       -0.049467   0.028970
## Medium.Cloud.Cover.daily.max..mid.cld.lay.       0.012472   0.003045
## Low.Cloud.Cover.daily.min..low.cld.lay.         -0.024118   0.009856
## Wind.Speed.daily.min..10.m.above.gnd.            0.087502   0.037367
## Wind.Speed.daily.min..900.mb.                   -0.034248   0.017190
## Mois02                                          -1.486047   0.451934
## Mois03                                          -1.536682   0.479477
## Mois04                                          -1.852143   0.508465
## Mois05                                          -1.010525   0.533413
## Mois06                                          -0.564789   0.567759
## Mois07                                          -0.935005   0.599852
## Mois08                                          -1.498265   0.595950
## Mois09                                          -1.548971   0.552013
## Mois10                                          -1.348912   0.465607
## Mois11                                          -1.347134   0.450773
## Mois12                                          -0.482738   0.445234
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE   0.573656   0.287425
##                                                z value Pr(>|z|)    
## (Intercept)                                      7.038 1.95e-12 ***
## Total.Cloud.Cover.daily.mean..sfc.              -2.112 0.034713 *  
## High.Cloud.Cover.daily.mean..high.cld.lay.       2.055 0.039895 *  
## Low.Cloud.Cover.daily.mean..low.cld.lay.         2.112 0.034653 *  
## Wind.Direction.daily.mean..10.m.above.gnd.       2.101 0.035672 *  
## Wind.Speed.daily.mean..80.m.above.gnd.          -2.856 0.004296 ** 
## Wind.Direction.daily.mean..80.m.above.gnd.      -1.934 0.053132 .  
## Wind.Speed.daily.mean..900.mb.                   2.434 0.014937 *  
## Wind.Direction.daily.mean..900.mb.               4.129 3.64e-05 ***
## Temperature.daily.max..2.m.above.gnd.            3.577 0.000347 ***
## Temperature.daily.min..2.m.above.gnd.           -3.516 0.000437 ***
## Mean.Sea.Level.Pressure.daily.max..MSL.         -7.259 3.91e-13 ***
## Total.Cloud.Cover.daily.min..sfc.                2.615 0.008920 ** 
## High.Cloud.Cover.daily.min..high.cld.lay.       -1.708 0.087724 .  
## Medium.Cloud.Cover.daily.max..mid.cld.lay.       4.096 4.20e-05 ***
## Low.Cloud.Cover.daily.min..low.cld.lay.         -2.447 0.014409 *  
## Wind.Speed.daily.min..10.m.above.gnd.            2.342 0.019196 *  
## Wind.Speed.daily.min..900.mb.                   -1.992 0.046338 *  
## Mois02                                          -3.288 0.001008 ** 
## Mois03                                          -3.205 0.001351 ** 
## Mois04                                          -3.643 0.000270 ***
## Mois05                                          -1.894 0.058165 .  
## Mois06                                          -0.995 0.319849    
## Mois07                                          -1.559 0.119061    
## Mois08                                          -2.514 0.011934 *  
## Mois09                                          -2.806 0.005015 ** 
## Mois10                                          -2.897 0.003766 ** 
## Mois11                                          -2.988 0.002804 ** 
## Mois12                                          -1.084 0.278260    
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE   1.996 0.045950 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  881.35  on 802  degrees of freedom
## AIC: 941.35
## 
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 3")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 3"
pchisq(914.85, 857, lower = F) # La p-valeur - 8,3% - s'améliore un peu. 
## [1] 0.08331966
###############################################################################################################
#                                                                                                             #
#                    Modele 4 : Idem mais modélisation progressive avec le critère AIC                        #
#                                                                                                             #
###############################################################################################################

formule4 <- as.formula(paste("pluie.demain ~",paste(names(train[, predicteurs]),collapse = "+"), paste("+Mois+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) + I(High.Cloud.Cover.daily.mean..high.cld.lay. >15) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015)")))
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 4")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 4"
model4 <- stepAIC(glm(pluie.demain ~ 1, data=train[d,], family = binomial(link="logit")),direction="both", scope=list(upper=formule4), trace = F)
summary(model4)
## 
## Call:
## glm(formula = pluie.demain ~ Mean.Sea.Level.Pressure.daily.min..MSL. + 
##     Medium.Cloud.Cover.daily.max..mid.cld.lay. + Wind.Direction.daily.mean..900.mb. + 
##     Mois + I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15) + 
##     Mean.Sea.Level.Pressure.daily.max..MSL. + Mean.Sea.Level.Pressure.daily.mean..MSL. + 
##     I(Sunshine.Duration.daily.sum..sfc. < 250) + Total.Cloud.Cover.daily.mean..sfc. + 
##     Total.Cloud.Cover.daily.min..sfc. + Low.Cloud.Cover.daily.min..low.cld.lay. + 
##     High.Cloud.Cover.daily.min..high.cld.lay., family = binomial(link = "logit"), 
##     data = train[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3589  -0.8674  -0.3324   0.8954   2.3333  
## 
## Coefficients:
##                                                          Estimate
## (Intercept)                                            108.457661
## Mean.Sea.Level.Pressure.daily.min..MSL.                 -0.154413
## Medium.Cloud.Cover.daily.max..mid.cld.lay.               0.010383
## Wind.Direction.daily.mean..900.mb.                       0.004311
## Mois02                                                  -1.392205
## Mois03                                                  -1.153179
## Mois04                                                  -1.456125
## Mois05                                                  -0.529835
## Mois06                                                  -0.194013
## Mois07                                                  -0.644206
## Mois08                                                  -1.154428
## Mois09                                                  -1.215988
## Mois10                                                  -1.153747
## Mois11                                                  -1.297263
## Mois12                                                  -0.583244
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE   0.410521
## Mean.Sea.Level.Pressure.daily.max..MSL.                 -0.166900
## Mean.Sea.Level.Pressure.daily.mean..MSL.                 0.214020
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE           0.655360
## Total.Cloud.Cover.daily.mean..sfc.                      -0.013517
## Total.Cloud.Cover.daily.min..sfc.                        0.026176
## Low.Cloud.Cover.daily.min..low.cld.lay.                 -0.024667
## High.Cloud.Cover.daily.min..high.cld.lay.               -0.041816
##                                                        Std. Error z value
## (Intercept)                                             14.642710   7.407
## Mean.Sea.Level.Pressure.daily.min..MSL.                  0.075055  -2.057
## Medium.Cloud.Cover.daily.max..mid.cld.lay.               0.002866   3.623
## Wind.Direction.daily.mean..900.mb.                       0.001233   3.497
## Mois02                                                   0.441715  -3.152
## Mois03                                                   0.443031  -2.603
## Mois04                                                   0.435937  -3.340
## Mois05                                                   0.414463  -1.278
## Mois06                                                   0.413740  -0.469
## Mois07                                                   0.421459  -1.529
## Mois08                                                   0.417413  -2.766
## Mois09                                                   0.435429  -2.793
## Mois10                                                   0.404233  -2.854
## Mois11                                                   0.426787  -3.040
## Mois12                                                   0.433197  -1.346
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE   0.196005   2.094
## Mean.Sea.Level.Pressure.daily.max..MSL.                  0.074909  -2.228
## Mean.Sea.Level.Pressure.daily.mean..MSL.                 0.139011   1.540
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE           0.277350   2.363
## Total.Cloud.Cover.daily.mean..sfc.                       0.005338  -2.532
## Total.Cloud.Cover.daily.min..sfc.                        0.009237   2.834
## Low.Cloud.Cover.daily.min..low.cld.lay.                  0.009471  -2.605
## High.Cloud.Cover.daily.min..high.cld.lay.                0.026467  -1.580
##                                                        Pr(>|z|)    
## (Intercept)                                            1.29e-13 ***
## Mean.Sea.Level.Pressure.daily.min..MSL.                0.039654 *  
## Medium.Cloud.Cover.daily.max..mid.cld.lay.             0.000291 ***
## Wind.Direction.daily.mean..900.mb.                     0.000471 ***
## Mois02                                                 0.001623 ** 
## Mois03                                                 0.009243 ** 
## Mois04                                                 0.000837 ***
## Mois05                                                 0.201121    
## Mois06                                                 0.639123    
## Mois07                                                 0.126385    
## Mois08                                                 0.005680 ** 
## Mois09                                                 0.005228 ** 
## Mois10                                                 0.004315 ** 
## Mois11                                                 0.002369 ** 
## Mois12                                                 0.178182    
## I(High.Cloud.Cover.daily.mean..high.cld.lay. > 15)TRUE 0.036220 *  
## Mean.Sea.Level.Pressure.daily.max..MSL.                0.025879 *  
## Mean.Sea.Level.Pressure.daily.mean..MSL.               0.123661    
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE         0.018131 *  
## Total.Cloud.Cover.daily.mean..sfc.                     0.011331 *  
## Total.Cloud.Cover.daily.min..sfc.                      0.004598 ** 
## Low.Cloud.Cover.daily.min..low.cld.lay.                0.009199 ** 
## High.Cloud.Cover.daily.min..high.cld.lay.              0.114117    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  903.21  on 809  degrees of freedom
## AIC: 949.21
## 
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 4")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 4"
pchisq(942.53,865, lower = F)
## [1] 0.03385231
###############################################################################################################
#                                                                                                             #
#                    Modele 5 : Modélisation descendante avec le critère AIC                                  #
#                               Ajout d'interactions entre variables                                          #
###############################################################################################################

model5 <- stepAIC(glm(pluie.demain ~ . - Date + I(Temperature.daily.mean..2.m.above.gnd.^2)+(Wind.Speed.daily.mean..900.mb.^2)+I(Total.Cloud.Cover.daily.mean..sfc.^2)+I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)+I(Total.Precipitation.daily.sum..sfc.^2) +Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.+Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd.+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015), data=train[d,], family = binomial(link="logit")),direction="backward", trace =F)
summary(model5)
## 
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. + 
##     Relative.Humidity.daily.mean..2.m.above.gnd. + Total.Precipitation.daily.sum..sfc. + 
##     Total.Cloud.Cover.daily.mean..sfc. + High.Cloud.Cover.daily.mean..high.cld.lay. + 
##     Low.Cloud.Cover.daily.mean..low.cld.lay. + Wind.Speed.daily.mean..10.m.above.gnd. + 
##     Wind.Direction.daily.mean..10.m.above.gnd. + Wind.Direction.daily.mean..80.m.above.gnd. + 
##     Wind.Speed.daily.mean..900.mb. + Wind.Direction.daily.mean..900.mb. + 
##     Temperature.daily.min..2.m.above.gnd. + Mean.Sea.Level.Pressure.daily.min..MSL. + 
##     Total.Cloud.Cover.daily.min..sfc. + High.Cloud.Cover.daily.min..high.cld.lay. + 
##     Medium.Cloud.Cover.daily.max..mid.cld.lay. + Low.Cloud.Cover.daily.min..low.cld.lay. + 
##     Wind.Speed.daily.min..10.m.above.gnd. + Wind.Speed.daily.min..900.mb. + 
##     Mois + I(Temperature.daily.mean..2.m.above.gnd.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) + 
##     I(Total.Precipitation.daily.sum..sfc.^2) + I(Total.Precipitation.daily.sum..sfc. > 
##     1) + I(Sunshine.Duration.daily.sum..sfc. < 250) + Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd., 
##     family = binomial(link = "logit"), data = train[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5470  -0.8390  -0.2663   0.8719   2.2417  
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                          8.133e+01
## Temperature.daily.mean..2.m.above.gnd.                                              -1.557e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        -5.178e-02
## Total.Precipitation.daily.sum..sfc.                                                 -1.300e-01
## Total.Cloud.Cover.daily.mean..sfc.                                                  -1.937e-02
## High.Cloud.Cover.daily.mean..high.cld.lay.                                           1.463e-02
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                             1.767e-02
## Wind.Speed.daily.mean..10.m.above.gnd.                                              -1.201e-01
## Wind.Direction.daily.mean..10.m.above.gnd.                                           1.515e-02
## Wind.Direction.daily.mean..80.m.above.gnd.                                          -1.291e-02
## Wind.Speed.daily.mean..900.mb.                                                       3.597e-02
## Wind.Direction.daily.mean..900.mb.                                                   6.610e-03
## Temperature.daily.min..2.m.above.gnd.                                               -2.974e-01
## Mean.Sea.Level.Pressure.daily.min..MSL.                                             -4.810e-02
## Total.Cloud.Cover.daily.min..sfc.                                                    2.773e-02
## High.Cloud.Cover.daily.min..high.cld.lay.                                           -7.282e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                           1.132e-02
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             -2.523e-02
## Wind.Speed.daily.min..10.m.above.gnd.                                                1.041e-01
## Wind.Speed.daily.min..900.mb.                                                       -2.986e-02
## Mois02                                                                              -1.889e+00
## Mois03                                                                              -1.590e+00
## Mois04                                                                              -1.806e+00
## Mois05                                                                              -1.142e+00
## Mois06                                                                              -8.745e-01
## Mois07                                                                              -1.423e+00
## Mois08                                                                              -2.066e+00
## Mois09                                                                              -1.718e+00
## Mois10                                                                              -1.323e+00
## Mois11                                                                              -1.344e+00
## Mois12                                                                              -5.558e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          7.794e-03
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        -2.950e-05
## I(Total.Precipitation.daily.sum..sfc.^2)                                             5.513e-03
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE                                       5.483e-01
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE                                       5.188e-01
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  3.843e-03
##                                                                                     Std. Error
## (Intercept)                                                                          1.784e+01
## Temperature.daily.mean..2.m.above.gnd.                                               1.353e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         2.426e-02
## Total.Precipitation.daily.sum..sfc.                                                  7.583e-02
## Total.Cloud.Cover.daily.mean..sfc.                                                   1.061e-02
## High.Cloud.Cover.daily.mean..high.cld.lay.                                           6.765e-03
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                             8.816e-03
## Wind.Speed.daily.mean..10.m.above.gnd.                                               3.733e-02
## Wind.Direction.daily.mean..10.m.above.gnd.                                           6.534e-03
## Wind.Direction.daily.mean..80.m.above.gnd.                                           6.712e-03
## Wind.Speed.daily.mean..900.mb.                                                       1.649e-02
## Wind.Direction.daily.mean..900.mb.                                                   1.721e-03
## Temperature.daily.min..2.m.above.gnd.                                                8.099e-02
## Mean.Sea.Level.Pressure.daily.min..MSL.                                              2.917e-02
## Total.Cloud.Cover.daily.min..sfc.                                                    9.976e-03
## High.Cloud.Cover.daily.min..high.cld.lay.                                            3.419e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                           3.112e-03
## Low.Cloud.Cover.daily.min..low.cld.lay.                                              1.036e-02
## Wind.Speed.daily.min..10.m.above.gnd.                                                3.937e-02
## Wind.Speed.daily.min..900.mb.                                                        1.808e-02
## Mois02                                                                               4.796e-01
## Mois03                                                                               5.298e-01
## Mois04                                                                               5.859e-01
## Mois05                                                                               6.031e-01
## Mois06                                                                               6.379e-01
## Mois07                                                                               6.728e-01
## Mois08                                                                               6.683e-01
## Mois09                                                                               6.172e-01
## Mois10                                                                               5.051e-01
## Mois11                                                                               4.669e-01
## Mois12                                                                               4.466e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          2.012e-03
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         1.523e-05
## I(Total.Precipitation.daily.sum..sfc.^2)                                             3.280e-03
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE                                       3.140e-01
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE                                       2.921e-01
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  1.510e-03
##                                                                                     z value
## (Intercept)                                                                           4.558
## Temperature.daily.mean..2.m.above.gnd.                                               -1.151
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         -2.134
## Total.Precipitation.daily.sum..sfc.                                                  -1.714
## Total.Cloud.Cover.daily.mean..sfc.                                                   -1.826
## High.Cloud.Cover.daily.mean..high.cld.lay.                                            2.162
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                              2.004
## Wind.Speed.daily.mean..10.m.above.gnd.                                               -3.218
## Wind.Direction.daily.mean..10.m.above.gnd.                                            2.319
## Wind.Direction.daily.mean..80.m.above.gnd.                                           -1.923
## Wind.Speed.daily.mean..900.mb.                                                        2.181
## Wind.Direction.daily.mean..900.mb.                                                    3.840
## Temperature.daily.min..2.m.above.gnd.                                                -3.671
## Mean.Sea.Level.Pressure.daily.min..MSL.                                              -1.649
## Total.Cloud.Cover.daily.min..sfc.                                                     2.779
## High.Cloud.Cover.daily.min..high.cld.lay.                                            -2.130
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                            3.637
## Low.Cloud.Cover.daily.min..low.cld.lay.                                              -2.434
## Wind.Speed.daily.min..10.m.above.gnd.                                                 2.645
## Wind.Speed.daily.min..900.mb.                                                        -1.652
## Mois02                                                                               -3.939
## Mois03                                                                               -3.001
## Mois04                                                                               -3.082
## Mois05                                                                               -1.893
## Mois06                                                                               -1.371
## Mois07                                                                               -2.115
## Mois08                                                                               -3.092
## Mois09                                                                               -2.784
## Mois10                                                                               -2.619
## Mois11                                                                               -2.878
## Mois12                                                                               -1.244
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                           3.873
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         -1.936
## I(Total.Precipitation.daily.sum..sfc.^2)                                              1.681
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE                                        1.746
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE                                        1.776
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.   2.545
##                                                                                     Pr(>|z|)
## (Intercept)                                                                         5.15e-06
## Temperature.daily.mean..2.m.above.gnd.                                              0.249792
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        0.032852
## Total.Precipitation.daily.sum..sfc.                                                 0.086498
## Total.Cloud.Cover.daily.mean..sfc.                                                  0.067887
## High.Cloud.Cover.daily.mean..high.cld.lay.                                          0.030583
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                            0.045046
## Wind.Speed.daily.mean..10.m.above.gnd.                                              0.001291
## Wind.Direction.daily.mean..10.m.above.gnd.                                          0.020386
## Wind.Direction.daily.mean..80.m.above.gnd.                                          0.054511
## Wind.Speed.daily.mean..900.mb.                                                      0.029169
## Wind.Direction.daily.mean..900.mb.                                                  0.000123
## Temperature.daily.min..2.m.above.gnd.                                               0.000241
## Mean.Sea.Level.Pressure.daily.min..MSL.                                             0.099120
## Total.Cloud.Cover.daily.min..sfc.                                                   0.005448
## High.Cloud.Cover.daily.min..high.cld.lay.                                           0.033200
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                          0.000276
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             0.014922
## Wind.Speed.daily.min..10.m.above.gnd.                                               0.008173
## Wind.Speed.daily.min..900.mb.                                                       0.098597
## Mois02                                                                              8.19e-05
## Mois03                                                                              0.002694
## Mois04                                                                              0.002059
## Mois05                                                                              0.058314
## Mois06                                                                              0.170420
## Mois07                                                                              0.034468
## Mois08                                                                              0.001988
## Mois09                                                                              0.005365
## Mois10                                                                              0.008818
## Mois11                                                                              0.004001
## Mois12                                                                              0.213351
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         0.000107
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        0.052831
## I(Total.Precipitation.daily.sum..sfc.^2)                                            0.092788
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE                                      0.080801
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE                                      0.075725
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.010940
##                                                                                        
## (Intercept)                                                                         ***
## Temperature.daily.mean..2.m.above.gnd.                                                 
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        *  
## Total.Precipitation.daily.sum..sfc.                                                 .  
## Total.Cloud.Cover.daily.mean..sfc.                                                  .  
## High.Cloud.Cover.daily.mean..high.cld.lay.                                          *  
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                            *  
## Wind.Speed.daily.mean..10.m.above.gnd.                                              ** 
## Wind.Direction.daily.mean..10.m.above.gnd.                                          *  
## Wind.Direction.daily.mean..80.m.above.gnd.                                          .  
## Wind.Speed.daily.mean..900.mb.                                                      *  
## Wind.Direction.daily.mean..900.mb.                                                  ***
## Temperature.daily.min..2.m.above.gnd.                                               ***
## Mean.Sea.Level.Pressure.daily.min..MSL.                                             .  
## Total.Cloud.Cover.daily.min..sfc.                                                   ** 
## High.Cloud.Cover.daily.min..high.cld.lay.                                           *  
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                          ***
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             *  
## Wind.Speed.daily.min..10.m.above.gnd.                                               ** 
## Wind.Speed.daily.min..900.mb.                                                       .  
## Mois02                                                                              ***
## Mois03                                                                              ** 
## Mois04                                                                              ** 
## Mois05                                                                              .  
## Mois06                                                                                 
## Mois07                                                                              *  
## Mois08                                                                              ** 
## Mois09                                                                              ** 
## Mois10                                                                              ** 
## Mois11                                                                              ** 
## Mois12                                                                                 
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         ***
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        .  
## I(Total.Precipitation.daily.sum..sfc.^2)                                            .  
## I(Total.Precipitation.daily.sum..sfc. > 1)TRUE                                      .  
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE                                      .  
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  859.66  on 795  degrees of freedom
## AIC: 933.66
## 
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 5")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 5"
pchisq(909.75,857, lower = F) #La p-valeur s'améliore. A un seuil de 10%, ce modèle est acceptable.
## [1] 0.1029499
###############################################################################################################
#                                                                                                             #
#                    Modele 6 : Modélisation descendante avec le critère AIC                                  #
#                   Ajout d'interactions entre variables, et de liaisons quadratiques                         #
###############################################################################################################

formule6 <- as.formula(paste("pluie.demain ~",paste(names(train[, predicteurs]),collapse = "+"), paste("+Mois++ I(Temperature.daily.mean..2.m.above.gnd.^2)+(Wind.Speed.daily.mean..900.mb.^2)+I(Total.Cloud.Cover.daily.mean..sfc.^2)+I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)+I(Total.Precipitation.daily.sum..sfc.^2) +Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.+Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd.+I(Total.Precipitation.daily.sum..sfc.>1) +I(Snowfall.amount.raw.daily.sum..sfc.< 1 ) + I(Total.Cloud.Cover.daily.mean..sfc. >50) +I(Sunshine.Duration.daily.sum..sfc.<250)+I(Wind.Speed.daily.mean..900.mb.<15)+I(Mean.Sea.Level.Pressure.daily.max..MSL. < 1015)")))

model6 <- stepAIC(glm(pluie.demain ~ 1 -Mois, data=train[d,], family = binomial(link="logit")),direction="both", scope=list(upper=formule6), trace = F)
summary(model6)
## 
## Call:
## glm(formula = pluie.demain ~ Mean.Sea.Level.Pressure.daily.min..MSL. + 
##     Medium.Cloud.Cover.daily.max..mid.cld.lay. + Wind.Direction.daily.mean..900.mb. + 
##     Mois + I(Temperature.daily.mean..2.m.above.gnd.^2) + Temperature.daily.min..2.m.above.gnd. + 
##     I(Sunshine.Duration.daily.sum..sfc. < 250) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) + 
##     High.Cloud.Cover.daily.max..high.cld.lay. + Mean.Sea.Level.Pressure.daily.mean..MSL., 
##     family = binomial(link = "logit"), data = train[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3316  -0.8797  -0.3044   0.8635   2.2468  
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     1.820e+01  3.699e+01
## Mean.Sea.Level.Pressure.daily.min..MSL.        -1.434e-01  7.472e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay.      7.345e-03  2.973e-03
## Wind.Direction.daily.mean..900.mb.              5.712e-03  1.325e-03
## Mois02                                         -1.448e+00  4.354e-01
## Mois03                                         -9.377e-01  4.415e-01
## Mois04                                         -1.133e+00  4.495e-01
## Mois05                                         -3.984e-01  4.697e-01
## Mois06                                         -2.219e-01  5.050e-01
## Mois07                                         -7.934e-01  5.500e-01
## Mois08                                         -1.289e+00  5.464e-01
## Mois09                                         -1.009e+00  5.063e-01
## Mois10                                         -8.680e-01  4.373e-01
## Mois11                                         -9.277e-01  4.275e-01
## Mois12                                         -6.746e-01  4.360e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2)     6.238e-03  1.350e-03
## Temperature.daily.min..2.m.above.gnd.          -1.595e-01  4.015e-02
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE  5.620e-01  2.003e-01
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)   -7.961e-05  3.642e-05
## High.Cloud.Cover.daily.max..high.cld.lay.       5.170e-03  2.696e-03
## Mean.Sea.Level.Pressure.daily.mean..MSL.        2.050e-01  1.378e-01
##                                                z value Pr(>|z|)    
## (Intercept)                                      0.492 0.622751    
## Mean.Sea.Level.Pressure.daily.min..MSL.         -1.919 0.055003 .  
## Medium.Cloud.Cover.daily.max..mid.cld.lay.       2.471 0.013484 *  
## Wind.Direction.daily.mean..900.mb.               4.309 1.64e-05 ***
## Mois02                                          -3.325 0.000884 ***
## Mois03                                          -2.124 0.033673 *  
## Mois04                                          -2.520 0.011747 *  
## Mois05                                          -0.848 0.396373    
## Mois06                                          -0.439 0.660395    
## Mois07                                          -1.442 0.149177    
## Mois08                                          -2.360 0.018293 *  
## Mois09                                          -1.992 0.046398 *  
## Mois10                                          -1.985 0.047127 *  
## Mois11                                          -2.170 0.030005 *  
## Mois12                                          -1.547 0.121796    
## I(Temperature.daily.mean..2.m.above.gnd.^2)      4.622 3.80e-06 ***
## Temperature.daily.min..2.m.above.gnd.           -3.972 7.11e-05 ***
## I(Sunshine.Duration.daily.sum..sfc. < 250)TRUE   2.806 0.005010 ** 
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)    -2.186 0.028803 *  
## High.Cloud.Cover.daily.max..high.cld.lay.        1.918 0.055156 .  
## Mean.Sea.Level.Pressure.daily.mean..MSL.         1.487 0.136933    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  897.56  on 811  degrees of freedom
## AIC: 939.56
## 
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 6")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 6"
pchisq(927.94,862, lower = F) #La p-valeur reste modeste.
## [1] 0.05875778
rm(formule, formule4, formule6)

2.5 Discrétisation des variables ?

< Les modèles peuvent sembler insuffisants. Une des voies d’amélioration reside dans la discrétisation des variables numériques. < La discrétisation peut constituer une réponse à l’absence de lien non linéaire entre la variable numerique et la variable réponse.

#Pas de lien évident avec la variable réponse
rm(q, tempmoy)
###############################################################################################################
#                                 Humidite relative                                                           #
###############################################################################################################
q <-quantile(train$Relative.Humidity.daily.mean..2.m.above.gnd.,seq(0, 1, by = 0.1))
qhumiditemoy.2m <-cut(train$Relative.Humidity.daily.mean..2.m.above.gnd, q)
  barplot(prop.table(table(qhumiditemoy.2m, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8),
          las = 3,main = "Relative.Humidity.daily.mean..2.m.above.gnd.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# A nouveau, pas de lien évident avec la variable réponse
rm(q,qhumiditemoy.2m)
###############################################################################################################
#                                 Couverture nuageuse moyenne en pourcentage                                  #
###############################################################################################################
q <-quantile(train$Total.Cloud.Cover.daily.mean..sfc.,seq(0, 1, by = 0.1))
nebulositemoy <-cut(train$Total.Cloud.Cover.daily.mean..sfc., q)
barplot(prop.table(table(nebulositemoy, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8),
  las = 3,main = "Total.Cloud.Cover.daily.mean..sfc.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement au seuil de 63%
train2<-train
train2$nebulositemoy  =NA
train2$nebulositemoy[train$Total.Cloud.Cover.daily.mean..sfc.< 63]="Degage ou partiellement couvert"
train2$nebulositemoy[train$Total.Cloud.Cover.daily.mean..sfc.>=63]="Couvert"
rm(q,nebulositemoy)
###############################################################################################################
#                                 Pression atmospherique au niveau de la mer                                  #
###############################################################################################################
q <-quantile(train$Mean.Sea.Level.Pressure.daily.mean..MSL.,seq(0, 1, by = 0.1))
pressionmoy <-cut(train$Mean.Sea.Level.Pressure.daily.mean..MSL., q)
barplot(prop.table(table(pressionmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
  main = "Mean.Sea.Level.Pressure.daily.mean..MSL.",ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement au seuil de 1015 hPa
train2$pressionmoy  =NA
train2$pressionmoy[train$Mean.Sea.Level.Pressure.daily.mean..MSL.< 1017]="Pression basse et moyenne"
train2$pressionmoy[train$Mean.Sea.Level.Pressure.daily.mean..MSL.>=1017]="Haute pression"
rm(q,pressionmoy)
###############################################################################################################
#                                         Precipitation moyenne                                                 #
###############################################################################################################
q <-select(train,Total.Precipitation.daily.sum..sfc.) %>% filter(Total.Precipitation.daily.sum..sfc.>0) %>%  quantile(train$Total.Precipitation.daily.sum..sfc.,probs=seq(0,1,0.1))
precipmoy<-cut(train$Total.Precipitation.daily.sum..sfc, q)
barplot(prop.table(table(precipmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
  main = "Total.Precipitation.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement pour les précipitations moyennes
train2 <- mutate(train2, precipmoy = case_when(
                    Total.Precipitation.daily.sum..sfc. ==0  ~ "Precipit. nulle",
                    Total.Precipitation.daily.sum..sfc. > 0 & Total.Precipitation.daily.sum..sfc. <= 0.2 ~ "Precip. faible",
                    Total.Precipitation.daily.sum..sfc. > 0.2 ~ "Precip. moy. ou forte"))
rm(q,precipmoy)
###############################################################################################################
#                                         Chute de neige                                                      #
###############################################################################################################
q <- quantile(train$Snowfall.amount.raw.daily.sum..sfc.[train$Snowfall.amount.raw.daily.sum..sfc.>0],probs=seq(0,1,0.25))
neigemoy <-cut(train$Snowfall.amount.raw.daily.sum..sfc., q)
barplot(prop.table(table(neigemoy, train$pluie.demain), 1)[, 2],ylim = c(0, 1),las = 3,
  main = "Snowfall.amount.raw.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement pour les chutes de neige
train2 <- mutate(train2, neigemoy = case_when(
                    Snowfall.amount.raw.daily.sum..sfc. ==0  ~ "Pas de neige",
                    Snowfall.amount.raw.daily.sum..sfc. > 0  ~ "Chute de neige"))
rm(q,neigemoy)
###############################################################################################################
#                                         Temps d'ensoleillement                                              #
###############################################################################################################
q <-quantile(train$Sunshine.Duration.daily.sum..sfc.[train$Sunshine.Duration.daily.sum..sfc.>0],probs=seq(0,1,0.1))
soleilmoy <-cut(train$Sunshine.Duration.daily.sum..sfc., q)
barplot(prop.table(table(soleilmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
  main = "Sunshine.Duration.daily.sum..sfc.", ylab = "Jours de pluie",density = 0)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement pour l'ensoleillement
train2 <- mutate(train2, soleilmoy = case_when(
                    Sunshine.Duration.daily.sum..sfc. ==0  ~ "Pas d'ensoleillem.",
                    Sunshine.Duration.daily.sum..sfc. > 0 &  Sunshine.Duration.daily.sum..sfc.<=328 ~ "Peu d'ensoleillem.",
                    Sunshine.Duration.daily.sum..sfc. > 328  ~ "Ensoleillem."))
rm(q,soleilmoy)
###############################################################################################################
#                                         Rayonnement solaire                                                 #
###############################################################################################################
q <-quantile(train$Shortwave.Radiation.daily.sum..sfc.,probs=seq(0,1,0.1))
rayonmoy <-cut(train$Shortwave.Radiation.daily.sum..sfc., q)
barplot(prop.table(table(rayonmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8), las = 3,
  main = "Shortwave.Radiation.daily.sum..sfc.", ylab = "Jours de pluie",density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement pour le rayonnement
train2 <- mutate(train2, rayonmoy = case_when(
                    Shortwave.Radiation.daily.sum..sfc.<3182  ~ "Faible rayonnement.",
                    Shortwave.Radiation.daily.sum..sfc.>= 3182 ~ "Rayonnem. eleve"))
rm(q,rayonmoy)
###############################################################################################################
#                                         Vitesse du vent                                                     #
############################################################################################################### 
q <-quantile(train$Wind.Speed.daily.mean..10.m.above.gnd,probs=seq(0,1,0.1))
ventmoy <-cut(train$Wind.Speed.daily.mean..10.m.above.gnd, q)
barplot(prop.table(table(ventmoy, train$pluie.demain), 1)[, 2],ylim = c(0, 0.8),las = 3,
        main = "Wind.Speed.daily.mean..10.m.above.gnd", ylab = "Jours de pluie",density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)

#Relation linéaire assez clair. La discrétisation ne constitue pas une option pertinente.
rm(q,ventmoy)
###############################################################################################################
#                                       Direction du vent                                                     #
############################################################################################################### 
q <-quantile(train$Wind.Direction.daily.mean..10.m.above.gnd.,probs=seq(0,1,0.1))
directventmoy <-cut(train$Wind.Direction.daily.mean..10.m.above.gnd., q)
barplot(prop.table(table(directventmoy, train$pluie.demain), 1)[, 2], ylim = c(0, 0.8), las = 3,
  main = "Wind.Direction.daily.mean..10.m.above.gnd.", ylab = "Jours de pluie",
  density = 0, cex.axis = .5)
abline(h = mean((train$pluie.demain) == T), lty = 2)

# création d'une nouvelle variable dans la table d'entraînement pour la direction du vent
train2 <- mutate(train2, directventmoy = case_when(
                    Wind.Direction.daily.mean..10.m.above.gnd.<138  ~ "N-SE",
                    Wind.Direction.daily.mean..10.m.above.gnd.>= 138 ~ "SE-NO"))
rm(q,directventmoy)
###############################################################################################################
#                                             Modele 7                                                        #
############################################################################################################### 
model7 <- step(glm(pluie.demain ~ . 
                  - Date
                  - Total.Cloud.Cover.daily.mean..sfc.
                  - Mean.Sea.Level.Pressure.daily.mean..MSL.
                  - Total.Precipitation.daily.sum..sfc.
                  - Sunshine.Duration.daily.sum..sfc.
                  - Shortwave.Radiation.daily.sum..sfc.
                  - Wind.Direction.daily.mean..10.m.above.gnd.
                  + I(Temperature.daily.mean..2.m.above.gnd.^2)
                  + I(Wind.Speed.daily.mean..900.mb.^2)
                  + I(Total.Cloud.Cover.daily.mean..sfc.^2)
                  + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)
                  + I(Total.Precipitation.daily.sum..sfc.^2)
                  + Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.
                  + Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd., data=train2[d,],
                  family = binomial(link="logit")), direction = "backward", trace =F)
summary(model7)
## 
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. + 
##     Relative.Humidity.daily.mean..2.m.above.gnd. + High.Cloud.Cover.daily.mean..high.cld.lay. + 
##     Low.Cloud.Cover.daily.mean..low.cld.lay. + Wind.Speed.daily.mean..10.m.above.gnd. + 
##     Wind.Direction.daily.mean..900.mb. + Temperature.daily.min..2.m.above.gnd. + 
##     Mean.Sea.Level.Pressure.daily.min..MSL. + Total.Cloud.Cover.daily.min..sfc. + 
##     High.Cloud.Cover.daily.min..high.cld.lay. + Medium.Cloud.Cover.daily.max..mid.cld.lay. + 
##     Low.Cloud.Cover.daily.min..low.cld.lay. + Wind.Speed.daily.min..10.m.above.gnd. + 
##     Wind.Speed.daily.min..900.mb. + Mois + nebulositemoy + directventmoy + 
##     I(Temperature.daily.mean..2.m.above.gnd.^2) + I(Wind.Speed.daily.mean..900.mb.^2) + 
##     I(Total.Cloud.Cover.daily.mean..sfc.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) + 
##     Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd., 
##     family = binomial(link = "logit"), data = train2[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5279  -0.8646  -0.2855   0.8477   2.2343  
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                          7.741e+01
## Temperature.daily.mean..2.m.above.gnd.                                              -1.320e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        -4.438e-02
## High.Cloud.Cover.daily.mean..high.cld.lay.                                           1.310e-02
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                             1.580e-02
## Wind.Speed.daily.mean..10.m.above.gnd.                                              -8.789e-02
## Wind.Direction.daily.mean..900.mb.                                                   5.981e-03
## Temperature.daily.min..2.m.above.gnd.                                               -3.117e-01
## Mean.Sea.Level.Pressure.daily.min..MSL.                                             -4.448e-02
## Total.Cloud.Cover.daily.min..sfc.                                                    2.855e-02
## High.Cloud.Cover.daily.min..high.cld.lay.                                           -4.834e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                           9.242e-03
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             -2.201e-02
## Wind.Speed.daily.min..10.m.above.gnd.                                                8.708e-02
## Wind.Speed.daily.min..900.mb.                                                       -3.280e-02
## Mois02                                                                              -1.779e+00
## Mois03                                                                              -1.392e+00
## Mois04                                                                              -1.645e+00
## Mois05                                                                              -9.294e-01
## Mois06                                                                              -7.301e-01
## Mois07                                                                              -1.246e+00
## Mois08                                                                              -1.835e+00
## Mois09                                                                              -1.530e+00
## Mois10                                                                              -1.192e+00
## Mois11                                                                              -1.203e+00
## Mois12                                                                              -5.214e-01
## nebulositemoyDegage ou partiellement couvert                                        -6.283e-01
## directventmoySE-NO                                                                   4.040e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          7.906e-03
## I(Wind.Speed.daily.mean..900.mb.^2)                                                  4.330e-04
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                             -1.789e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        -2.895e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  3.542e-03
##                                                                                     Std. Error
## (Intercept)                                                                          1.726e+01
## Temperature.daily.mean..2.m.above.gnd.                                               1.346e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         2.383e-02
## High.Cloud.Cover.daily.mean..high.cld.lay.                                           6.432e-03
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                             8.004e-03
## Wind.Speed.daily.mean..10.m.above.gnd.                                               3.541e-02
## Wind.Direction.daily.mean..900.mb.                                                   1.491e-03
## Temperature.daily.min..2.m.above.gnd.                                                7.876e-02
## Mean.Sea.Level.Pressure.daily.min..MSL.                                              2.856e-02
## Total.Cloud.Cover.daily.min..sfc.                                                    1.029e-02
## High.Cloud.Cover.daily.min..high.cld.lay.                                            2.968e-02
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                           2.764e-03
## Low.Cloud.Cover.daily.min..low.cld.lay.                                              9.925e-03
## Wind.Speed.daily.min..10.m.above.gnd.                                                3.807e-02
## Wind.Speed.daily.min..900.mb.                                                        1.758e-02
## Mois02                                                                               4.830e-01
## Mois03                                                                               5.397e-01
## Mois04                                                                               5.899e-01
## Mois05                                                                               6.139e-01
## Mois06                                                                               6.508e-01
## Mois07                                                                               6.798e-01
## Mois08                                                                               6.726e-01
## Mois09                                                                               6.239e-01
## Mois10                                                                               5.216e-01
## Mois11                                                                               4.722e-01
## Mois12                                                                               4.515e-01
## nebulositemoyDegage ou partiellement couvert                                         3.772e-01
## directventmoySE-NO                                                                   2.604e-01
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          1.970e-03
## I(Wind.Speed.daily.mean..900.mb.^2)                                                  2.150e-04
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                              1.095e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         1.511e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  1.499e-03
##                                                                                     z value
## (Intercept)                                                                           4.485
## Temperature.daily.mean..2.m.above.gnd.                                               -0.980
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         -1.863
## High.Cloud.Cover.daily.mean..high.cld.lay.                                            2.036
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                              1.974
## Wind.Speed.daily.mean..10.m.above.gnd.                                               -2.482
## Wind.Direction.daily.mean..900.mb.                                                    4.013
## Temperature.daily.min..2.m.above.gnd.                                                -3.958
## Mean.Sea.Level.Pressure.daily.min..MSL.                                              -1.557
## Total.Cloud.Cover.daily.min..sfc.                                                     2.774
## High.Cloud.Cover.daily.min..high.cld.lay.                                            -1.629
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                            3.343
## Low.Cloud.Cover.daily.min..low.cld.lay.                                              -2.218
## Wind.Speed.daily.min..10.m.above.gnd.                                                 2.287
## Wind.Speed.daily.min..900.mb.                                                        -1.865
## Mois02                                                                               -3.683
## Mois03                                                                               -2.579
## Mois04                                                                               -2.788
## Mois05                                                                               -1.514
## Mois06                                                                               -1.122
## Mois07                                                                               -1.833
## Mois08                                                                               -2.729
## Mois09                                                                               -2.452
## Mois10                                                                               -2.285
## Mois11                                                                               -2.547
## Mois12                                                                               -1.155
## nebulositemoyDegage ou partiellement couvert                                         -1.666
## directventmoySE-NO                                                                    1.552
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                           4.013
## I(Wind.Speed.daily.mean..900.mb.^2)                                                   2.014
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                              -1.634
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         -1.916
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.   2.363
##                                                                                     Pr(>|z|)
## (Intercept)                                                                         7.28e-06
## Temperature.daily.mean..2.m.above.gnd.                                              0.326936
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        0.062498
## High.Cloud.Cover.daily.mean..high.cld.lay.                                          0.041743
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                            0.048326
## Wind.Speed.daily.mean..10.m.above.gnd.                                              0.013052
## Wind.Direction.daily.mean..900.mb.                                                  6.01e-05
## Temperature.daily.min..2.m.above.gnd.                                               7.56e-05
## Mean.Sea.Level.Pressure.daily.min..MSL.                                             0.119421
## Total.Cloud.Cover.daily.min..sfc.                                                   0.005538
## High.Cloud.Cover.daily.min..high.cld.lay.                                           0.103376
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                          0.000828
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             0.026586
## Wind.Speed.daily.min..10.m.above.gnd.                                               0.022187
## Wind.Speed.daily.min..900.mb.                                                       0.062132
## Mois02                                                                              0.000231
## Mois03                                                                              0.009900
## Mois04                                                                              0.005305
## Mois05                                                                              0.130075
## Mois06                                                                              0.261941
## Mois07                                                                              0.066750
## Mois08                                                                              0.006359
## Mois09                                                                              0.014210
## Mois10                                                                              0.022311
## Mois11                                                                              0.010866
## Mois12                                                                              0.248160
## nebulositemoyDegage ou partiellement couvert                                        0.095728
## directventmoySE-NO                                                                  0.120767
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         5.99e-05
## I(Wind.Speed.daily.mean..900.mb.^2)                                                 0.044060
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                             0.102335
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        0.055383
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.018136
##                                                                                        
## (Intercept)                                                                         ***
## Temperature.daily.mean..2.m.above.gnd.                                                 
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        .  
## High.Cloud.Cover.daily.mean..high.cld.lay.                                          *  
## Low.Cloud.Cover.daily.mean..low.cld.lay.                                            *  
## Wind.Speed.daily.mean..10.m.above.gnd.                                              *  
## Wind.Direction.daily.mean..900.mb.                                                  ***
## Temperature.daily.min..2.m.above.gnd.                                               ***
## Mean.Sea.Level.Pressure.daily.min..MSL.                                                
## Total.Cloud.Cover.daily.min..sfc.                                                   ** 
## High.Cloud.Cover.daily.min..high.cld.lay.                                              
## Medium.Cloud.Cover.daily.max..mid.cld.lay.                                          ***
## Low.Cloud.Cover.daily.min..low.cld.lay.                                             *  
## Wind.Speed.daily.min..10.m.above.gnd.                                               *  
## Wind.Speed.daily.min..900.mb.                                                       .  
## Mois02                                                                              ***
## Mois03                                                                              ** 
## Mois04                                                                              ** 
## Mois05                                                                                 
## Mois06                                                                                 
## Mois07                                                                              .  
## Mois08                                                                              ** 
## Mois09                                                                              *  
## Mois10                                                                              *  
## Mois11                                                                              *  
## Mois12                                                                                 
## nebulositemoyDegage ou partiellement couvert                                        .  
## directventmoySE-NO                                                                     
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         ***
## I(Wind.Speed.daily.mean..900.mb.^2)                                                 *  
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                                
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        .  
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  866.85  on 799  degrees of freedom
## AIC: 932.85
## 
## Number of Fisher Scoring iterations: 5
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 7")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 7"
pchisq(898.08,853, lower = F)
## [1] 0.1381116
# La significativite globale du modèle s'améliore un peu tout comme l'AIC.
###############################################################################################################
#                                             Modele 8                                                        #
############################################################################################################### 
# Pour pallier les éventuels problèmes de colinéarité entre variables, nous choisissons de calculer les
# amplitudes entre valeurs minimales et maximales au cours de la journée pour plusieurs variables.

train2$amplitud_temp.2m.gnd<-(train2$Temperature.daily.max..2.m.above.gnd.-train2$Temperature.daily.min..2.m.above.gnd.)
train2$amplitud_humid.2m.gnd<-(train2$Relative.Humidity.daily.max..2.m.above.gnd.-train2$Relative.Humidity.daily.min..2.m.above.gnd.)
train2$amplitud_vitesse_vent.10.m<-(train2$Wind.Speed.daily.max..10.m.above.gnd.-train2$Wind.Speed.daily.min..10.m.above.gnd.)
train2$amplitud_pression<-(train2$Mean.Sea.Level.Pressure.daily.max..MSL. -train2$Mean.Sea.Level.Pressure.daily.min..MSL.)
train2$amplitud_nebulosite<-(train2$Total.Cloud.Cover.daily.max..sfc. -train2$Total.Cloud.Cover.daily.min..sfc.)
train2$amplitud_rafale <-(train2$Wind.Gust.daily.max..sfc. -train2$Wind.Gust.daily.min..sfc.)

model8 <- step(glm(pluie.demain ~ . 
                  - Date
                  - Total.Cloud.Cover.daily.mean..sfc.
                  - Mean.Sea.Level.Pressure.daily.mean..MSL.
                  - Total.Precipitation.daily.sum..sfc.
                  - Sunshine.Duration.daily.sum..sfc.
                  - Shortwave.Radiation.daily.sum..sfc.
                  - Wind.Direction.daily.mean..10.m.above.gnd.
                  - Wind.Direction.daily.mean..80.m.above.gnd.
                  - Wind.Direction.daily.mean..900.mb.
                  - Temperature.daily.max..2.m.above.gnd.-Temperature.daily.min..2.m.above.gnd.
                  - Relative.Humidity.daily.max..2.m.above.gnd.-Relative.Humidity.daily.min..2.m.above.gnd.
                  - Wind.Speed.daily.max..10.m.above.gnd.-Wind.Speed.daily.min..10.m.above.gnd.
                  - Wind.Speed.daily.max..80.m.above.gnd.-Wind.Speed.daily.min..80.m.above.gnd.
                  - Wind.Speed.daily.max..900.mb.-Wind.Speed.daily.min..900.mb.
                  - Wind.Speed.daily.mean..80.m.above.gnd.
                  - Mean.Sea.Level.Pressure.daily.max..MSL.-Mean.Sea.Level.Pressure.daily.min..MSL.
                  - Total.Cloud.Cover.daily.max..sfc. - Total.Cloud.Cover.daily.min..sfc.
                  - High.Cloud.Cover.daily.max..high.cld.lay. - High.Cloud.Cover.daily.min..high.cld.lay.
                  - High.Cloud.Cover.daily.mean..high.cld.lay.
                  - Medium.Cloud.Cover.daily.max..mid.cld.lay. - Medium.Cloud.Cover.daily.min..mid.cld.lay.
                  - Medium.Cloud.Cover.daily.mean..mid.cld.lay.
                  - Low.Cloud.Cover.daily.max..low.cld.lay. -  Low.Cloud.Cover.daily.min..low.cld.lay.
                  - Low.Cloud.Cover.daily.mean..low.cld.lay.
                  - Wind.Gust.daily.max..sfc. 
                  - Wind.Gust.daily.min..sfc.
                  + I(Temperature.daily.mean..2.m.above.gnd.^2)
                  + I(Wind.Speed.daily.mean..900.mb.^2)
                  + I(Total.Cloud.Cover.daily.mean..sfc.^2)
                  + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)
                  + I(Total.Precipitation.daily.sum..sfc.^2)
                  + Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.
                  + Relative.Humidity.daily.mean..2.m.above.gnd.:Temperature.daily.mean..2.m.above.gnd., data=train2[d,],
                  family = binomial(link="logit")), direction = "backward", trace =F)
summary(model8)
## 
## Call:
## glm(formula = pluie.demain ~ Temperature.daily.mean..2.m.above.gnd. + 
##     Relative.Humidity.daily.mean..2.m.above.gnd. + Wind.Speed.daily.mean..10.m.above.gnd. + 
##     Wind.Speed.daily.mean..900.mb. + Mois + precipmoy + directventmoy + 
##     amplitud_temp.2m.gnd + amplitud_pression + I(Temperature.daily.mean..2.m.above.gnd.^2) + 
##     I(Total.Cloud.Cover.daily.mean..sfc.^2) + I(Mean.Sea.Level.Pressure.daily.max..MSL.^2) + 
##     Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb. + 
##     Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd., 
##     family = binomial(link = "logit"), data = train2[d, ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2823  -0.8942  -0.3373   0.8928   2.5469  
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                          5.768e+01
## Temperature.daily.mean..2.m.above.gnd.                                              -4.880e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        -5.161e-02
## Wind.Speed.daily.mean..10.m.above.gnd.                                              -5.719e-02
## Wind.Speed.daily.mean..900.mb.                                                      -4.148e-02
## Mois02                                                                              -1.683e+00
## Mois03                                                                              -1.437e+00
## Mois04                                                                              -1.521e+00
## Mois05                                                                              -8.849e-01
## Mois06                                                                              -6.601e-01
## Mois07                                                                              -1.280e+00
## Mois08                                                                              -1.779e+00
## Mois09                                                                              -1.376e+00
## Mois10                                                                              -1.192e+00
## Mois11                                                                              -9.747e-01
## Mois12                                                                              -5.874e-01
## precipmoyPrecip. moy. ou forte                                                       1.844e-01
## precipmoyPrecipit. nulle                                                            -3.929e-01
## directventmoySE-NO                                                                   4.054e-01
## amplitud_temp.2m.gnd                                                                 1.814e-01
## amplitud_pression                                                                    4.259e-02
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          8.650e-03
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                              1.590e-04
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        -5.209e-05
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.                    2.361e-04
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  3.843e-03
##                                                                                     Std. Error
## (Intercept)                                                                          7.631e+00
## Temperature.daily.mean..2.m.above.gnd.                                               1.257e-01
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         2.219e-02
## Wind.Speed.daily.mean..10.m.above.gnd.                                               2.753e-02
## Wind.Speed.daily.mean..900.mb.                                                       1.624e-02
## Mois02                                                                               4.652e-01
## Mois03                                                                               4.963e-01
## Mois04                                                                               5.465e-01
## Mois05                                                                               5.581e-01
## Mois06                                                                               5.943e-01
## Mois07                                                                               6.220e-01
## Mois08                                                                               6.185e-01
## Mois09                                                                               5.809e-01
## Mois10                                                                               4.885e-01
## Mois11                                                                               4.469e-01
## Mois12                                                                               4.333e-01
## precipmoyPrecip. moy. ou forte                                                       3.206e-01
## precipmoyPrecipit. nulle                                                             3.267e-01
## directventmoySE-NO                                                                   2.424e-01
## amplitud_temp.2m.gnd                                                                 4.199e-02
## amplitud_pression                                                                    2.694e-02
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                          1.998e-03
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                              4.039e-05
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         6.930e-06
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.                    6.073e-05
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.  1.413e-03
##                                                                                     z value
## (Intercept)                                                                           7.558
## Temperature.daily.mean..2.m.above.gnd.                                               -3.882
## Relative.Humidity.daily.mean..2.m.above.gnd.                                         -2.325
## Wind.Speed.daily.mean..10.m.above.gnd.                                               -2.078
## Wind.Speed.daily.mean..900.mb.                                                       -2.555
## Mois02                                                                               -3.617
## Mois03                                                                               -2.895
## Mois04                                                                               -2.783
## Mois05                                                                               -1.586
## Mois06                                                                               -1.111
## Mois07                                                                               -2.057
## Mois08                                                                               -2.876
## Mois09                                                                               -2.369
## Mois10                                                                               -2.440
## Mois11                                                                               -2.181
## Mois12                                                                               -1.356
## precipmoyPrecip. moy. ou forte                                                        0.575
## precipmoyPrecipit. nulle                                                             -1.203
## directventmoySE-NO                                                                    1.673
## amplitud_temp.2m.gnd                                                                  4.321
## amplitud_pression                                                                     1.581
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                           4.330
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                               3.938
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                         -7.516
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.                     3.888
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd.   2.719
##                                                                                     Pr(>|z|)
## (Intercept)                                                                         4.09e-14
## Temperature.daily.mean..2.m.above.gnd.                                              0.000104
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        0.020047
## Wind.Speed.daily.mean..10.m.above.gnd.                                              0.037724
## Wind.Speed.daily.mean..900.mb.                                                      0.010628
## Mois02                                                                              0.000297
## Mois03                                                                              0.003795
## Mois04                                                                              0.005385
## Mois05                                                                              0.112842
## Mois06                                                                              0.266725
## Mois07                                                                              0.039647
## Mois08                                                                              0.004023
## Mois09                                                                              0.017825
## Mois10                                                                              0.014694
## Mois11                                                                              0.029178
## Mois12                                                                              0.175159
## precipmoyPrecip. moy. ou forte                                                      0.565285
## precipmoyPrecipit. nulle                                                            0.229072
## directventmoySE-NO                                                                  0.094394
## amplitud_temp.2m.gnd                                                                1.55e-05
## amplitud_pression                                                                   0.113848
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         1.49e-05
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                             8.23e-05
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        5.64e-14
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.                   0.000101
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. 0.006544
##                                                                                        
## (Intercept)                                                                         ***
## Temperature.daily.mean..2.m.above.gnd.                                              ***
## Relative.Humidity.daily.mean..2.m.above.gnd.                                        *  
## Wind.Speed.daily.mean..10.m.above.gnd.                                              *  
## Wind.Speed.daily.mean..900.mb.                                                      *  
## Mois02                                                                              ***
## Mois03                                                                              ** 
## Mois04                                                                              ** 
## Mois05                                                                                 
## Mois06                                                                                 
## Mois07                                                                              *  
## Mois08                                                                              ** 
## Mois09                                                                              *  
## Mois10                                                                              *  
## Mois11                                                                              *  
## Mois12                                                                                 
## precipmoyPrecip. moy. ou forte                                                         
## precipmoyPrecipit. nulle                                                               
## directventmoySE-NO                                                                  .  
## amplitud_temp.2m.gnd                                                                ***
## amplitud_pression                                                                      
## I(Temperature.daily.mean..2.m.above.gnd.^2)                                         ***
## I(Total.Cloud.Cover.daily.mean..sfc.^2)                                             ***
## I(Mean.Sea.Level.Pressure.daily.max..MSL.^2)                                        ***
## Wind.Speed.daily.mean..900.mb.:Wind.Direction.daily.mean..900.mb.                   ***
## Temperature.daily.mean..2.m.above.gnd.:Relative.Humidity.daily.mean..2.m.above.gnd. ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1152.01  on 831  degrees of freedom
## Residual deviance:  902.57  on 806  degrees of freedom
## AIC: 954.57
## 
## Number of Fisher Scoring iterations: 4
print("Test du rapport de vraisemblance du modèle saturé contre le modèle 7")
## [1] "Test du rapport de vraisemblance du modèle saturé contre le modèle 7"
pchisq(925.23,858, lower = F)
## [1] 0.05497837
# Le modéle est juste au dessus du seuil de significativite globale à 5%. Mais il présente l'avantage d'avoir moins, ou peu, de prédicteurs corrélés et d'être en outre plus parcimonieux. 

3 Adequation

3.1 Courbes de ROC

< Au regard des AUC, relativement proches entre modèles, le modèle 7 présente les meilleurs résultats.

###############################################################################################################
#                               Courbes de ROC des modèles 2, 5, 7 et 8                                       #
###############################################################################################################

#Courbe de ROC des 4 modèles : 2, 5, 7 et 8

predM2 = predict(model2, newdata = train2[d,], type = "response")
predM5 = predict(model5, newdata = train2[d,], type = "response")
predM7 = predict(model7, newdata = train2[d,], type = "response")
predM8 = predict(model8, newdata = train2[d,], type = "response")

par(pty="s")
#Modèle 2
roc(train2$pluie.demain[d],predM2, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")

## 
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM2,     percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs",     ylab = "% vrais positifs")
## 
## Data: predM2 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 80.65%
#Modèle 5
roc(train2$pluie.demain[d],predM5, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")

## 
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM5,     percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs",     ylab = "% vrais positifs")
## 
## Data: predM5 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 81.9%
#Modèle 7
roc(train2$pluie.demain[d],predM7, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")

## 
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM7,     percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs",     ylab = "% vrais positifs")
## 
## Data: predM7 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 81.66%
#Modèle 8
roc(train2$pluie.demain[d],predM8, plot=T, print.auc =T,legacy.axes=T,percent=T, xlab="% faux positifs", ylab = "% vrais positifs")

## 
## Call:
## roc.default(response = train2$pluie.demain[d], predictor = predM8,     percent = T, plot = T, print.auc = T, legacy.axes = T, xlab = "% faux positifs",     ylab = "% vrais positifs")
## 
## Data: predM8 in 433 controls (train2$pluie.demain[d] FALSE) < 399 cases (train2$pluie.demain[d] TRUE).
## Area under the curve: 79.91%

3.2 *Validation croisée

= seuil[i]) res[i]=mean(abs(pred - train2[d, “pluie.demain”]), na.rm = T) } } seuil[which.min(res)]

3.3 [1] 0.54041

```

#Au seuil de 55%, l'erreur moyenne est minimum (0,27)
preditM5 = (predM5 >= 0.55)
print('Matrice de confusion du modèle 5 au seuil de 55 %')
## [1] "Matrice de confusion du modèle 5 au seuil de 55 %"
table("PREDITS" = preditM5, "OBSERVES"=train2$pluie.demain[d])
##        OBSERVES
## PREDITS FALSE TRUE
##   FALSE   345  128
##   TRUE     88  271
mean(abs(preditM5 - train2[d, "pluie.demain"]), na.rm = T)
## [1] 0.2596154
#On obtient 74,8% de bonnes prédictions de jours de pluie

###############################################################################################################
#                               Validation croisee pour le modele 7                                           #
###############################################################################################################
seuil = seq(0, 1, by=.00001)
{res = rep(NA, length(seuil))
for(i in 1:length(seuil)){
  pred = (predM7 >= seuil[i])
  res[i]=mean(abs(pred - train2[d, "pluie.demain"]), na.rm = T)
}
}
seuil[which.min(res)]
## [1] 0.57366
#Au seuil de 47%, l'erreur moyenne est minimum (0,25)
preditM7 = (predM7 >= 0.47)
print('Matrice de confusion du modèle 7 au seuil de 47 %')
## [1] "Matrice de confusion du modèle 7 au seuil de 47 %"
table("PREDITS" = preditM7, "OBSERVES"=train2$pluie.demain[d])
##        OBSERVES
## PREDITS FALSE TRUE
##   FALSE   306  103
##   TRUE    127  296
mean(abs(preditM7 - train2[d, "pluie.demain"]), na.rm = T)
## [1] 0.2764423
#On obtient 79% de bonnes prédictions de jours de pluie

###############################################################################################################
#                               Validation croisee pour le modele 8                                           #
###############################################################################################################
seuil = seq(0, 1, by=.00001)
{res = rep(NA, length(seuil))
for(i in 1:length(seuil)){
  pred = (predM8 >= seuil[i])
  res[i]=mean(abs(pred - train2[d, "pluie.demain"]), na.rm = T)
}
}
seuil[which.min(res)]
## [1] 0.60976
#Au seuil de 45%, l'erreur moyenne est minimum (0,26)
preditM8 = (predM8 >= 0.45)
print('Matrice de confusion du modèle 8 au seuil de 45 %')
## [1] "Matrice de confusion du modèle 8 au seuil de 45 %"
table("PREDITS" = preditM8, "OBSERVES"=train2$pluie.demain[d])
##        OBSERVES
## PREDITS FALSE TRUE
##   FALSE   293   97
##   TRUE    140  302
mean(abs(preditM8 - train2[d, "pluie.demain"]), na.rm = T)
## [1] 0.2848558
#On obtient 81% de bonnes prédictions de jours de pluie. Par contre, est sensiblement dégradé le taux de faux positifs (jours de plui prédits à tort).

3.4 *Prediction et export de la table

#creation des variables discretisees dans le jeu de donnees TEST

test$nebulositemoy  =NA
test$nebulositemoy[test$Total.Cloud.Cover.daily.mean..sfc.< 63]="Degage ou partiellement couvert"
test$nebulositemoy[test$Total.Cloud.Cover.daily.mean..sfc.>=63]="Couvert"
test$pressionmoy  =NA
test$pressionmoy[test$Mean.Sea.Level.Pressure.daily.mean..MSL.< 1017]="Pression basse et moyenne"
test$pressionmoy[test$Mean.Sea.Level.Pressure.daily.mean..MSL.>=1017]="Haute pression"
test <- mutate(test, precipmoy = case_when(
                    Total.Precipitation.daily.sum..sfc. ==0  ~ "Precipit. nulle",
                    Total.Precipitation.daily.sum..sfc. > 0 & Total.Precipitation.daily.sum..sfc. <= 0.2 ~ "Precip. faible",
                    Total.Precipitation.daily.sum..sfc. > 0.2 ~ "Precip. moy. ou forte"))

test <- mutate(test, neigemoy = case_when(
                    Snowfall.amount.raw.daily.sum..sfc. ==0  ~ "Pas de neige",
                    Snowfall.amount.raw.daily.sum..sfc. > 0  ~ "Chute de neige"))
test <- mutate(test, soleilmoy = case_when(
                    Sunshine.Duration.daily.sum..sfc. ==0  ~ "Pas d'ensoleillem.",
                    Sunshine.Duration.daily.sum..sfc. > 0 &  Sunshine.Duration.daily.sum..sfc.<=328 ~ "Peu d'ensoleillem.",
                    Sunshine.Duration.daily.sum..sfc. > 328  ~ "Ensoleillem."))

test <- mutate(test, rayonmoy = case_when(
                    Shortwave.Radiation.daily.sum..sfc.<3182  ~ "Faible rayonnement.",
                    Shortwave.Radiation.daily.sum..sfc.>= 3182 ~ "Rayonnem. eleve"))
test <- mutate(test, directventmoy = case_when(
                    Wind.Direction.daily.mean..10.m.above.gnd.<138  ~ "N-SE",
                    Wind.Direction.daily.mean..10.m.above.gnd.>= 138 ~ "SE-NO"))

#prediction avec le modele 7
resultat <- predict(model7,test,type = "response")
resultat <- cbind(test,resultat)
colnames(resultat)[colnames(resultat)=="resultat"] <- "Proba_estimee"
resultat$pluie.lendemain<-ifelse(resultat$Proba_estimee>0.47,T,F)
write.csv(resultat, file = "resultat.csv", row.names =F)